Introduction

This R markdown file will contain and outline code that we use to analyze CFTR SNPs on a cohort of diabetic patients and controls. This is the final steps of the workflow.

Inputting Files

The first step is inputting the data from the csv file. This is done below. We have 3514 samples from SRA.

rm(list = ls())
diabetes.data <- read.csv("new_diabetes_and_cftr.csv")
head(diabetes.data)
##   Disease.State        SRA Heterozygous.SNPs             Homozygous.SNPs
## 1        normal ERR1630013                                              
## 2        normal ERR1630014                   10229820;73715573;886044712
## 3        normal ERR1630015                   10229820;886044712;73715573
## 4        normal ERR1630016                            886044712;73715573
## 5        normal ERR1630017                                      73715573
## 6        normal ERR1630018                            886044712;73715573
dim(diabetes.data)
## [1] 3514    4

The next step is to develop a binary matrix containing data whether a certain SNP is present in an SRA sample or not.

This is done as follows

##number of rows in diabetic data/same as total SRA samples
rows.diabetes.data <- dim(diabetes.data)[1]

##all.snps captures all snps in homozygous and heterozygous set
all.snps <- c()
for (i in 1:rows.diabetes.data){
    entry.set.homozy <- unlist(strsplit(as.character(diabetes.data$Homozygous.SNPs[i]), split = ";"))
    entry.set.heterz <- unlist(strsplit(as.character(diabetes.data$Heterozygous.SNPs[i]), split = ";"))
    all.snps <- c(all.snps, entry.set.homozy, entry.set.heterz)
}

##takes the unique SNPs
unique.snps <- unique(all.snps)

##develops and writes a matrix.snps that is binary 1 and 0 matrix describing whether a SNP is present or absent in a particular SRA sample
matrix.snps <- matrix(0, nrow = rows.diabetes.data, length(unique.snps))
rownames(matrix.snps) <- diabetes.data$SRA
colnames(matrix.snps) <- unique.snps

for (i in 1:length(unique.snps)){
   index.homoz <- grep(pattern = colnames(matrix.snps)[i], as.character(diabetes.data$Homozygous.SNPs))
   index.heter <- grep(pattern = colnames(matrix.snps)[i], as.character(diabetes.data$Heterozygous.SNPs))
   matrix.snps[index.homoz,i] <- 1
   matrix.snps[index.heter,i] <- 1
}

dim(matrix.snps)
## [1] 3514  189

We have 2514 samples with 189 SNPs

A meta object is also made that contains sample information on disease state

meta.obj <- data.frame('name' = diabetes.data$SRA, 'Disease' = diabetes.data$Disease.State)
rownames(meta.obj) <- diabetes.data$SRA
SNP Frequency

We can see the SNP frequency as below.

Snp.Frequency <- colSums(matrix.snps)
hist(Snp.Frequency, breaks = 100)

Principal Component Analysis

We can do a principal component analysis of the SNP data. This is done as follows.

pca.diabetes <- prcomp(matrix.snps)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
pca1.diab <- pca.diabetes$x[,"PC1"]
pca2.diab <- pca.diabetes$x[,"PC2"]

pca.df <- data.frame('PC1' = pca1.diab, 'PC2' = pca2.diab)
pca.df <- data.frame('name' = meta.obj$name, pca.df, 'Disease' = meta.obj$Disease)
plot_ly(data = pca.df, x = ~PC1, y = ~PC2, text = ~name, color = ~Disease, colors = c("black", "red"))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
Chi-Square Test

Let’s do a series of chi-square tests.

Make a function for a test matrix.

test.matrix.snp <- function(index.snp){
    snp.vector <- matrix.snps[,index.snp]
    snp.vector.disease <- snp.vector[diabetes.data$Disease.State == 'type ii diabetes mellitus']
    snp.vector.normal <- snp.vector[diabetes.data$Disease.State == 'normal']
    matrix.store <- matrix(0,nrow = 2, ncol = 2)
    rownames(matrix.store) <- c("snp+", "snp-")
    colnames(matrix.store) <- c("disease", "normal")
    matrix.store[1,1] <- sum(snp.vector.disease)
    matrix.store[2,1] <- length(snp.vector.disease) - sum(snp.vector.disease)
    matrix.store[1,2] <- sum(snp.vector.normal)
    matrix.store[2,2] <- length(snp.vector.normal) - sum(snp.vector.normal)
    test.matrix.snp <- matrix.store
    return(test.matrix.snp)
    
}

Do some sanity checks on the test matrices

sum.test <- c()
for (i in 1:length(unique.snps)){
    sum.test[i] <- sum(test.matrix.snp(i))
}

disease.count <- c()
for (i in 1:length(unique.snps)){
    disease.count[i] <- colSums(test.matrix.snp(i))[1]
}

normal.count <- c()
for (i in 1:length(unique.snps)){
    normal.count[i] <- colSums(test.matrix.snp(i))[2]
}

The chi-square test is run as follows.

p.value.list <- c()

for (i in 1:length(unique.snps)){
obj <- chisq.test(test.matrix.snp(i)+1)
p.value.list <- c(p.value.list, obj$p.value)
}
## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(test.matrix.snp(i) + 1): Chi-squared approximation
## may be incorrect
names(p.value.list) <- as.character(unique.snps)

hist(p.value.list)

p.adjust.list <- p.adjust(p.value.list, method = "bonferroni")

hist(p.adjust.list)

Fisher’s Exact Test

We can run Fisher’s Exact test.

p.value.list.fisher <- c()

for (i in 1:length(unique.snps)){
obj <- fisher.test(test.matrix.snp(i))
p.value.list.fisher <- c(p.value.list.fisher, obj$p.value)
}

names(p.value.list.fisher) <- as.character(unique.snps)

hist(p.value.list.fisher)

p.adjust.list.fisher <- p.adjust(p.value.list.fisher, method = "bonferroni")

hist(p.adjust.list.fisher)

Incorporating More Sample Data

Given that these individual SRA ids are not patient data but sample data we need more information detailing information on each of these samples.

add.samp.meta.read <- read.table(file = "E-MTAB-5061.sdrf.txt", sep = "\t")
add.samp.meta <- add.samp.meta.read[2:dim(add.samp.meta.read)[1],]

for (i in 1:39){
colnames(add.samp.meta)[i] <- as.character(add.samp.meta.read[1,i])
}
rownames(add.samp.meta) <- add.samp.meta$`Comment[ENA_RUN]`

add.samp.meta <- data.frame(add.samp.meta, 'name' = rownames(add.samp.meta))

samp.total.meta <- add.samp.meta[rownames(meta.obj),]
dim(samp.total.meta)
## [1] 3514   40

We want to redo this analysis by only taking cell samples that are of decent quality

good.quality.list <- which(samp.total.meta$Characteristics.single.cell.well.quality == 'OK')
length(good.quality.list)
## [1] 2209

From 3512 samples, only 2209 are of good quality.

Let’s get a matrix and meta object with only good quality samples.

meta.quality <- samp.total.meta[good.quality.list,]
matrix.quality.snps <- matrix.snps[good.quality.list,]
PCA by Cell Types
pca.quality.diabetes <- prcomp(matrix.quality.snps)
library(plotly)
pca1.diab.q <- pca.quality.diabetes$x[,"PC1"]
pca2.diab.q <- pca.quality.diabetes$x[,"PC2"]

pca.df.q <- data.frame('PC1' = pca1.diab.q, 'PC2' = pca2.diab.q)
pca.df.q <- data.frame('name' = meta.quality$name, pca.df.q, 'Cell' = meta.quality$Characteristics.cell.type.)
plot_ly(data = pca.df.q, x = ~PC1, y = ~PC2, text = ~name, color = ~Cell, colors = c("black", "blue", "red", "green", "yellow", "orange", "magenta", "gray", "brown", "purple", "pink", "gold", "darkred", "darkgreen", "darkblue", "darkorange"))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

Let’s remove the ductal, acinar, and unclassified cells from the analysis.

noductal.list <- which(meta.quality$Characteristics.cell.type. != "ductal cell" & meta.quality$Characteristics.cell.type != "unclassified cell" & meta.quality$Characteristics.cell.type != "acinar cell")
meta.quality.nd <- meta.quality[noductal.list,]
matrix.quality.snps.nd <- matrix.quality.snps[noductal.list,]

Redo the PCA analysis

pca.quality.diabetes.nd <- prcomp(matrix.quality.snps.nd)
library(plotly)
pca1.diab.q.nd <- pca.quality.diabetes.nd$x[,"PC1"]
pca2.diab.q.nd <- pca.quality.diabetes.nd$x[,"PC2"]

pca.df.q.nd <- data.frame('PC1' = pca1.diab.q.nd, 'PC2' = pca2.diab.q.nd)
pca.df.q.nd <- data.frame('name' = meta.quality.nd$name, pca.df.q.nd, 'Cell' = meta.quality.nd$Characteristics.cell.type., 'Patient' = meta.quality.nd$Characteristics.individual., 'Disease' = meta.quality.nd$Characteristics.disease.)
plot_ly(data = pca.df.q.nd, x = ~PC1, y = ~PC2, text = ~name, color = ~Cell, colors = c("black", "blue", "red", "green", "yellow", "orange", "magenta", "gray", "brown", "purple", "pink", "gold", "darkred", "darkgreen", "darkblue", "darkorange"), symbol = ~Disease, symbols = c("circle", "o"))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

We can also color code by patient ID.

plot_ly(data = pca.df.q.nd, x = ~PC1, y = ~PC2, text = ~name, color = ~Patient, colors = c("black", "blue", "red", "green", "yellow", "orange", "magenta", "gray", "brown", "purple", "pink"))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
Ductal Cells

We will do the PCA on the ductal cells alone. First make the matrix and meta object for this information

ductal.list <- which(meta.quality$Characteristics.cell.type. == "ductal cell")
meta.quality.d <- meta.quality[ductal.list,]
matrix.quality.snps.d <- matrix.quality.snps[ductal.list,]

Then do the PCA

pca.quality.diabetes.d <- prcomp(matrix.quality.snps.d)
library(plotly)
pca1.diab.q.d <- pca.quality.diabetes.d$x[,"PC1"]
pca2.diab.q.d <- pca.quality.diabetes.d$x[,"PC2"]

pca.df.q.d <- data.frame('PC1' = pca1.diab.q.d, 'PC2' = pca2.diab.q.d)
pca.df.q.d <- data.frame('name' = meta.quality.d$name, pca.df.q.d, 'Cell' = meta.quality.d$Characteristics.cell.type., 'Patient' = meta.quality.d$Characteristics.individual., 'Disease' = meta.quality.d$Characteristics.disease.)
plot_ly(data = pca.df.q.d, x = ~PC1, y = ~PC2, text = ~name, color = ~Patient, colors = c("black", "blue", "red", "green", "yellow", "orange", "magenta", "gray", "brown", "purple", "pink", "gold", "darkred", "darkgreen", "darkblue", "darkorange"), symbol = ~Disease, symbols = c("circle", "o"))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

You are able to analyze how different people are able to express gene-specific variants in real time. You can see whether an individual’s ductal cells are expressing variants of CFTR gene.

Averaging by Patients

We plan to average the data by patients.

unique.patients <- unique(meta.quality.d$Characteristics.individual.)
unique.patients
##  [1] AZ           HP1502401    HP1504101T2D HP1504901    HP1506401   
##  [6] HP1507101    HP1508501T2D HP1509101    HP1525301T2D HP1526901T2D
## 11 Levels: AZ Characteristics[individual] HP1502401 ... HP1526901T2D
mat.bypatient <- c()
for (i in 1:10){
    index.patient.i <- which(meta.quality.d$Characteristics.individual. == unique.patients[i])
    mat.bypatient <- rbind(mat.bypatient, colMeans(matrix.quality.snps.d[index.patient.i,]))
}